In [1]:
import boto3
import json
import pandas as pd
import plotly.graph_objs as go
import plotly.express as ex

from botocore.exceptions import ClientError
from io import StringIO
from plotly.subplots import make_subplots
In [ ]:
 
In [2]:
def get_secret():
    secret_name = "s3_data_analysis"
    region_name = "us-east-1"

    # Create a Secrets Manager client with default credentials
    session = boto3.session.Session()
    client = session.client(
        service_name='secretsmanager',
        region_name=region_name
    )

    try:
        # Retrieve the secret value from Secrets Manager
        get_secret_value_response = client.get_secret_value(
            SecretId=secret_name
        )
    except ClientError as e:
        raise e

    # Extract the secret string (which is a JSON object)
    secret = get_secret_value_response['SecretString']
    
    # Parse the secret string as JSON
    secrets = json.loads(secret)
    
    # Retrieve AWS credentials from the secrets
    aws_access_key_id = secrets['aws_access_key_id']
    aws_secret_access_key = secrets['aws_secret_access_key']
    
    # Now use the retrieved credentials to create the S3 client
    s3_client = boto3.client(
        's3',
        aws_access_key_id=aws_access_key_id,
        aws_secret_access_key=aws_secret_access_key
    )

    return s3_client

s3_client = get_secret()
In [3]:
bucket_name = 'nk.data.analysis'
path = 'COVID_Analysis/covid_analysis'
In [4]:
files = [
    file["Key"].removeprefix("COVID_Analysis/covid_analysis/") for file in s3_client.list_objects_v2(Bucket=bucket_name, Prefix="COVID_Analysis/").get("Contents", [])
    if not file["Key"].endswith("/")
]
In [5]:
def read_csv_from_s3(bucket_name, key, s3_client):
    try:
        response = s3_client.get_object(Bucket=bucket_name, Key=key)
        csv_content = response['Body'].read().decode('utf-8')
        data = StringIO(csv_content)
        df = pd.read_csv(data)
        return df
    except Exception as e:
         print(f"Error reading CSV from S3: {e}")
         return None
In [6]:
for file in files:
    var_name = file.replace('.csv', '').replace('-', '_')  # Ensure valid variable names
    globals()[var_name] = read_csv_from_s3(bucket_name, path + '/' + file, s3_client)

worldometer_data.head()
Out[6]:
Country/Region Continent Population TotalCases NewCases TotalDeaths NewDeaths TotalRecovered NewRecovered ActiveCases Serious,Critical Tot Cases/1M pop Deaths/1M pop TotalTests Tests/1M pop WHO Region
0 USA North America 3.311981e+08 5032179 NaN 162804.0 NaN 2576668.0 NaN 2292707.0 18296.0 15194.0 492.0 63139605.0 190640.0 Americas
1 Brazil South America 2.127107e+08 2917562 NaN 98644.0 NaN 2047660.0 NaN 771258.0 8318.0 13716.0 464.0 13206188.0 62085.0 Americas
2 India Asia 1.381345e+09 2025409 NaN 41638.0 NaN 1377384.0 NaN 606387.0 8944.0 1466.0 30.0 22149351.0 16035.0 South-EastAsia
3 Russia Europe 1.459409e+08 871894 NaN 14606.0 NaN 676357.0 NaN 180931.0 2300.0 5974.0 100.0 29716907.0 203623.0 Europe
4 South Africa Africa 5.938157e+07 538184 NaN 9604.0 NaN 387316.0 NaN 141264.0 539.0 9063.0 162.0 3149807.0 53044.0 Africa
In [7]:
columns = ['TotalCases', 'TotalDeaths', 'TotalRecovered', 'ActiveCases']
for col in columns:
    fig = ex.treemap(worldometer_data.iloc[0:20], values = col, path = ['Country/Region'], title = 'Treemap representation of countries with regard to {}'.format(col))
    fig.show()
In [8]:
ex.line(day_wise, x='Date', y=['Confirmed', 'Deaths', 'Recovered', 'Active'], title='COVID with regard to Date', template='plotly_dark')
In [ ]:
 
In [9]:
pop_test_ratio = worldometer_data['Population']/worldometer_data['TotalTests'].iloc[0:20]
In [10]:
fig = ex.bar(worldometer_data.iloc[0:20],x='Country/Region',y=pop_test_ratio.iloc[0:20],color='Country/Region',title='Population:Tests Done')
fig.show()
In [ ]:
 
In [11]:
ex.bar(worldometer_data.iloc[0:20],x='Country/Region',y=['Serious,Critical', 'TotalDeaths', 'TotalRecovered', 'ActiveCases', 'TotalCases'])
In [ ]:
 
In [12]:
fig = ex.bar(worldometer_data.iloc[0:20],x='TotalCases',y='Country/Region',text='TotalCases', color='TotalCases')
fig.update_layout(template='plotly_dark', title_text='Top 20 countries of confirmed COVID cases')
fig.show()
In [ ]:
 
In [13]:
fig = ex.bar(worldometer_data.sort_values(by='TotalDeaths', ascending=False)[0:20],x='TotalDeaths',y='Country/Region',text='TotalDeaths', color='TotalDeaths')
fig.update_layout(template='plotly_dark', title_text='Top 20 countries of total death COVID cases')
fig.show()
In [ ]:
 
In [14]:
fig = ex.bar(worldometer_data.sort_values(by='ActiveCases', ascending=False)[0:20],x='ActiveCases',y='Country/Region',text='ActiveCases', color='ActiveCases')
fig.update_layout(template='plotly_dark', title_text='Top 20 countries of total active COVID cases')
fig.show()
In [ ]:
 
In [15]:
fig = ex.bar(worldometer_data.sort_values(by='TotalRecovered', ascending=False)[0:20],x='TotalRecovered',y='Country/Region',text='TotalRecovered', color='TotalRecovered')
fig.update_layout(template='plotly_dark', title_text='Top 20 countries of total recovered COVID cases')
fig.show()
In [ ]:
 
In [16]:
labels = worldometer_data[0:15]['Country/Region'].values
cases = ['TotalCases', 'TotalDeaths', 'TotalRecovered', 'ActiveCases']
for case in cases:
    fig = ex.pie(worldometer_data[0:15],values=case,names=labels,hole=0.3,title='{} worst 15 affected countries'.format(case))
    fig.show()
In [ ]:
 
In [17]:
deaths_to_confirmed_cases = worldometer_data['TotalDeaths']/worldometer_data['TotalCases']
In [18]:
ex.bar(worldometer_data,x='Country/Region',y=deaths_to_confirmed_cases,title='Death to Confirmed Cases by Worst Country')
In [19]:
deaths_to_recovered_cases = worldometer_data['TotalDeaths']/worldometer_data['TotalRecovered']
ex.bar(worldometer_data,x='Country/Region',y=deaths_to_recovered_cases,title='Death to Recovered Cases by Worst Country')
In [ ]:
 
In [20]:
tests_to_confirmed_cases = worldometer_data['TotalTests']/worldometer_data['TotalCases']
ex.bar(worldometer_data,x='Country/Region',y=tests_to_confirmed_cases,title='Tests to Confirmed Cases by Worst Country')
In [ ]:
 
In [21]:
serious_to_death_cases = worldometer_data['Serious,Critical']/worldometer_data['TotalDeaths']
ex.bar(worldometer_data,x='Country/Region',y=serious_to_death_cases,title='Serious/Critical to Deaths by Worst Country')
In [ ]:
 
In [22]:
def country_visualization(df,country):
    
    data=df[df['Country/Region']==country]
    
    df=data.loc[:,['Date','Confirmed','Deaths','Recovered','Active']]
    
    fig = make_subplots(rows=1, cols=4,subplot_titles=("Confirmed", "Active", "Recovered",'Deaths'))
    
    fig.add_trace(
        go.Scatter(name="Confirmed",x=df['Date'],y=df['Confirmed']),
        row=1, col=1
    )

    fig.add_trace(
        go.Scatter(name="Active",x=df['Date'],y=df['Active']),
        row=1, col=2
    )
    fig.add_trace(
        go.Scatter(name="Recovered",x=df['Date'],y=df['Recovered']),
        row=1, col=3
    )

    fig.add_trace(
        go.Scatter(name="Deaths",x=df['Date'],y=df['Deaths']),
        row=1, col=4
    )

    fig.update_layout(height=600, width=1000, title_text="Date Vs Recorded Cases of {}".format(country),template="plotly_dark")
    fig.show()
In [ ]:
 
In [23]:
country_visualization(full_grouped,'Brazil')
In [ ]:
 
In [24]:
country_visualization(full_grouped,'US')